package com.spark.mooc.ch6_sparksql.part01_createDataFrame

import org.apache.spark.sql.{DataFrame, DataFrameReader, SparkSession}

/**
 * @description:
 * @time: 2020/11/28 17:38
 * @author: lhy
 */
object TestDataFrame {
    def main(args: Array[String]): Unit = {
        /*
         1.生成DataFrame
         */
        val spark: SparkSession = SparkSession.builder().appName("dataFrame").master("local").getOrCreate()
        import spark.implicits._
        val df: DataFrame = spark.read.json("data.txt/people.json")
        df.show()
        /*
         2.DataFrame的保存
         */
        val peopleDF: DataFrame = spark.read.format("json").load("data.txt/people.json")
        peopleDF.select("name","age").write.format("csv").save("output/dataFrame/newPeople.csv")
        peopleDF.show()
        /*
         3.DataFrame常用的操作
         */
        // 打印DataFrame模式信息
        df.printSchema()
        // 选择
        df.select(df("name"),df("age")+1).show()
        // 实现对列名称进行重命名
        df.select(df("name"),df("age")+1 as "age" ).show()
        // 过滤
        df.filter(df("age") > 20).show
        // 分组聚合
        df.groupBy("age").count().show()
        // 排序
        df.sort(df("age").desc).show()
    }
}
